MadDF <- filter(FullDF,Site=="Madison")%>%
mutate(FlagedOutliers = IdentifyOutliers(N1,Bin = 21, Action = "Flag"),
#Manual flagging that method misses due to boundary effect with binning
FlagedOutliers = ifelse(Date == mdy("01/26/2021") | Date == mdy("01/27/2021"),
TRUE, FlagedOutliers),
NoOutlierVar = ifelse(FlagedOutliers, NA, N1))
OutlierDF <- MadDF%>%
filter(FlagedOutliers)
FullDFMassRatio <- FullDF%>%
filter(!is.na(N1))%>%
mutate(SC2_mass = (3.785*1e6)*FlowRate*N1)
MissingIntercepter <- FullDFMassRatio%>%
filter(Site!="Madison",!is.na(FlowRate)|!is.na(N1))%>%
group_by(Date)%>%
summarize(N = n())%>%
filter(N!=5)
FullDFMassRatio.Mad <- FullDFMassRatio%>%
filter(!is.na(SC2_mass))%>%
filter(Site=="Madison")
FullDFMassRatio.Inter <- FullDFMassRatio%>%
filter(!is.na(SC2_mass))%>%
filter(Site!="Madison")#TempErrorMetric
TempErrorMetric <- FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(InterSum = sum(SC2_mass),InterSumFlow = sum(FlowRate))%>%
inner_join(FullDFMassRatio.Mad)%>%
mutate(MassRatio = SC2_mass/InterSum,
ErrorRatio = 2*(SC2_mass-InterSum)/(SC2_mass+InterSum),
MassRatioFlow = FlowRate/InterSumFlow,
ErrorRatioFlow = 2*(FlowRate-InterSumFlow)/(FlowRate+InterSumFlow))
FullDFMassRatio.Mad <- FullDFMassRatio%>%
filter(!is.na(SC2_mass))%>%
filter(Site=="Madison")%>%
inner_join(FullDFMassRatio.Inter["Date"])
FullDFMassRatio.Inter <- FullDFMassRatio%>%
filter(Site!="Madison")
KeyOulierPoints <- c(ymd("2021-06-08"),
ymd("2021-10-17"),
ymd("2021-05-02"),
ymd("2021-01-26"))
NonOuliersDF <- MadDF%>%
mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
mutate(N1 = NoOutlierVar)%>%
filter(!(is.na(N1)))
OutLierPlotDF <- MadDF%>%
mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
mutate(N1 = NoOutlierVar)%>%
filter(!(is.na(N1)&is.na(Outlier)))%>%
ggplot(aes(x=Date))+#Data depends on time
geom_line(aes(y=N1,
color="N1",
info = N1),data=NonOuliersDF)+#compares N1
geom_point(aes(y=Outlier,
color="N1 Outlier",
info = Outlier))+
theme_light() +
scale_color_manual(values=c("#F8766D","#999999"))
ggplotly(OutLierPlotDF)
#"2021-06-08","2021-10-17","2021-05-02","2021-01-26"
Sum of interceptor flows agrees with composite flow except for dates with missing flow information for some interceptors. Purple boxes beneath data are to communicate the Dates with interceptor data partially missing.
MissingFlowData <- FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(n=n())%>%
filter(n!=5)%>%
mutate(MissingSite=TRUE)
FlowPlot <- FullDFMassRatio.Inter%>%
full_join(MissingFlowData)%>%
mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
filter(!is.na(FlowRate))%>%
ggplot()+
geom_col(aes(x=Date,y=FlowRate,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) +
geom_col(aes(x=Date,y=-.5),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
scale_fill_brewer(palette="Spectral") +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
theme_light() +
geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=FlowRate),size=2)+
ylab("Average daily flow (MGD)") +
geom_hline(yintercept=median(FullDFMassRatio.Mad$FlowRate), linetype = "dashed", color="black") +
ggtitle("Average daily flow (MMSD)")
ggplotly(FlowPlot)%>%
add_annotations( text="Site, Full Data For Day", xref="paper", yref="paper",
x=1.02, xanchor="left",
y=0.8, yanchor="bottom", # Same y as legend below
legendtitle=TRUE, showarrow=FALSE ) %>%
layout( legend=list(y=0.8, yanchor="top" ) )
MassBalencePlot <- FullDFMassRatio.Inter%>%
full_join(MissingFlowData)%>%
mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
#filter(!is.na(FlowRate))%>%
#filter(!MissingSite)%>%
ggplot()+
geom_col(aes(x=Date,y=SC2_mass,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) +
geom_col(aes(x=Date,y=-3e12),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
scale_fill_brewer(palette="Spectral") +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
theme_light() +
geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=SC2_mass),size=1)+
ylab("N1 gene copies per day") +
ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
ggplotly(MassBalencePlot)
#4-15
#N1/N1 travel to find more outliers
#Present puzzles to WSLH
#distribution of residuals
#if normalized resid should be iid
arranged plots with lines to communicate where flagged outliers are. Red Lines show Where there was a flagged on a day that also has collected interceptor data.
StartRange <- mdy("1-20-2021")
EndRange <- mdy("1-10-2022")
AllOuliersDates <- OutlierDF[["Date"]]
OutlierDatesIntercepters <- unique(inner_join(FullDFMassRatio.Inter,OutlierDF["Date"],by=c("Date"))$Date)
a <- OutLierPlotDF+
scale_x_date(limits = c(StartRange,EndRange))+
geom_vline(xintercept = AllOuliersDates)+
geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
#geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
theme(title = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position="none")
b <- MassBalencePlot+
scale_x_date(limits = c(StartRange,EndRange))+
geom_vline(xintercept = (AllOuliersDates))+
geom_vline(xintercept = (OutlierDatesIntercepters),color="Red")+
#geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
theme(title = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position="none")
c <- FlowPlot+
scale_x_date(limits = c(StartRange,EndRange))+
geom_vline(xintercept = AllOuliersDates)+
geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
theme(title = element_blank(),
legend.position="none")
ggarrange(a,b,c,nrow=3,align ="v",legend.grob=get_legend(MassBalencePlot),legend="right")
#Cause of missing data
OutlierDatesIntercepters
## [1] "2021-04-15" "2021-04-26" "2021-07-26" "2021-12-13"